Predictors: body mass interacting with habitat, effect of >10 kg.
mammal_bmr <- read.csv("data/bmr.csv")
Rename some variables and create species name from genus and species.
mammal_bmr <- mammal_bmr %>%
rename(source = Source,
order = order.corrected,
# n.animals = Number.of.Animals,
mass.kg = body.mass..kg.) %>%
mutate(order = str_to_title(order),
genus = str_to_title(genus),
animal = paste(genus, species),
above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))
Keep only variables we will be using. And “factor” “chr” variables.
mammal_bmr <- mammal_bmr %>%
dplyr::select(order,
genus,
species,
common.name,
mass.kg,
log10.body.mass,
log10.bmr,
diet,
habitat,
source,
animal,
above.10kg
) %>%
mutate(across(where(is.character), factor)) %>%
arrange(order, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10.bmr ~ log10.body.mass | habitat,
color = ~order,
# size = ~parse_number(n_animals),
data = mammal_bmr,
alpha = 0.5) %>%
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) %>%
gf_theme(scale_color_viridis_d('Order')) %>%
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(BMR (kcal/day))')
my_scatter_plot
plotly::ggplotly(my_scatter_plot) %>%
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat, effect of >10 kg.
lm_model <- lm(log10.bmr ~ log10.body.mass * habitat +
log10.body.mass*above.10kg, # + diet,
data = mammal_bmr)
tab_model(lm_model)
| log 10.bmr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.88 | 1.69 – 2.07 | <0.001 |
| log10.body.mass | 0.77 | 0.68 – 0.86 | <0.001 |
| habitat [land] | -0.19 | -0.37 – -0.01 | 0.041 |
| above.10kg [Smaller] | 0.04 | -0.10 – 0.19 | 0.547 |
|
log10.body.mass * habitat [land] |
0.04 | -0.06 – 0.14 | 0.470 |
|
log10.body.mass * above.10kg [Smaller] |
-0.12 | -0.21 – -0.03 | 0.006 |
| Observations | 722 | ||
| R2 / R2 adjusted | 0.961 / 0.961 | ||
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| log10.body.mass | 231.2 | 1 | 8907 | 0 |
| habitat | 0.2168 | 1 | 8.355 | 0.003963 |
| above.10kg | 0.5105 | 1 | 19.67 | 1.066e-05 |
| log10.body.mass:habitat | 0.01359 | 1 | 0.5235 | 0.4696 |
| log10.body.mass:above.10kg | 0.1961 | 1 | 7.556 | 0.006131 |
| Residuals | 18.58 | 716 | NA | NA |
For more on formatting the fitted model table see: https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html
lm_preds <- predict(lm_model, se.fit = TRUE)
mammal_bmr <- mammal_bmr %>%
mutate(lm_resids = resid(lm_model),
lm_fitted = lm_preds$fit,
lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_bmr)
acf(resid(lm_model))
gf_dhistogram(~lm_resids, data = mammal_bmr,
bins = 20) %>%
gf_fitdistr()
Any predictors not shown in a plot are held constant at their mean or most common value.
gf_line(10^lm_fitted ~ 10^log10.body.mass,
color = ~habitat,
data = mammal_bmr) %>%
gf_ribbon(10^lm_fit_lo + 10^lm_fit_hi ~ 10^log10.body.mass,
color = ~habitat, fill = ~habitat) %>%
# gf_facet_grid(~ diet) %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors)) %>%
gf_refine(scale_x_continuous(trans = 'log10'),
scale_y_continuous(trans = 'log10'))
gf_point(log10.bmr ~ lm_fitted, data = mammal_bmr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(BMR)',
x = 'Model-predicted log10(BMR)',
title = 'Linear Regresssion (no phylogeny)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Can refine the plot of predictions later on.
Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
re_model <- glmmTMB(log10.bmr ~ log10.body.mass * habitat +
log10.body.mass*above.10kg + # diet +
(1 | order/genus/species),
data = mammal_bmr)
tab_model(re_model)
| log 10.bmr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.85 | 1.67 – 2.03 | <0.001 |
| log10.body.mass | 0.74 | 0.65 – 0.82 | <0.001 |
| habitat [land] | -0.25 | -0.40 – -0.09 | 0.002 |
| above.10kg [Smaller] | 0.06 | -0.07 – 0.19 | 0.346 |
|
log10.body.mass * habitat [land] |
0.05 | -0.04 – 0.14 | 0.272 |
|
log10.body.mass * above.10kg [Smaller] |
-0.10 | -0.19 – -0.02 | 0.020 |
| Random Effects | |||
| σ2 | 0.00 | ||
| τ00 species:(genus:order) | 0.01 | ||
| τ00 genus:order | 0.01 | ||
| τ00 order | 0.02 | ||
| ICC | 0.97 | ||
| N species | 626 | ||
| N genus | 415 | ||
| N order | 24 | ||
| Observations | 722 | ||
| Marginal R2 / Conditional R2 | 0.948 / 0.998 | ||
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 5370 | 1 | 0 |
| habitat | 16.96 | 1 | 3.809e-05 |
| above.10kg | 7.427 | 1 | 0.006426 |
| log10.body.mass:habitat | 1.206 | 1 | 0.2721 |
| log10.body.mass:above.10kg | 5.412 | 1 | 0.02 |
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_bmr <- mammal_bmr %>%
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_bmr, 'fitted-models/mr-data.RDS')
saveRDS(re_model, 'fitted-models/mr-re-model.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_bmr)
acf(resid(re_model))
gf_dhistogram(~re_resids, data = mammal_bmr) %>%
gf_fitdistr()
mammal_bmr <- mammal_bmr %>%
mutate(pretty_diet = case_when(diet == 'c'~ 'Carnivore',
diet == 'h' ~ 'Herbivore',
diet == 'o' ~ 'Omnivore'),
pretty_diet = factor(pretty_diet, levels = c('Herbivore', 'Omnivore', 'Carnivore')),
Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10.bmr ~ 10^log10.body.mass,
color = ~Habitat, data = mammal_bmr,
size = 0.5, alpha = 0.4,
text = ~animal) %>%
gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
color = ~Habitat,
data = mammal_bmr,
text = ~animal) %>%
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) %>%
# gf_facet_grid(~ pretty_diet) %>%
gf_labs(x = 'Body Mass (kg)', y = 'BMR (kcal/day)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors)) %>%
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))
) %>%
gf_theme(plot.margin = unit(c(1,1,1,1), "cm"))
# Kleiber
# BMR (Kleiber, 1947): BMR (kcal/day) = 70 * body mass (kg) ^0.75
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>%
mutate(kleiber_bmr = 70*mass^0.75)
re_preds <- re_preds %>%
gf_line(kleiber_bmr ~ mass, data = pub_models, color = 'black',
text = ~"Kleiber") %>%
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) %>%
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')
Figure 1.1: Observed and predicted BMR as a function of mass. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, and black line is based on Kleiber (1947).
re_preds
gf_point(log10.bmr ~ re_ind_fitted, data = mammal_bmr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(BMR)',
x = 'Model-predicted, Species-specific log10(BMR)',
title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?
no_species <- mammal_bmr %>%
mutate(species = NA)
re_genus_level_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL,
newdata = no_species)
gf_point(log10.bmr ~ re_genus_level_preds$fit, data = mammal_bmr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(BMR)',
x = 'Model-predicted, Genus-specific log10(BMR)',
title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Weight observations by body mass, binned into log mass bins. The goal is to reduce the undue influence of many data points collected from very small animals.
nr <- nrow(mammal_bmr)
mammal_bmr <- mammal_bmr %>%
mutate(log.mass.bin = cut_width(log10.body.mass,
width = 1,
boundary = log10(0.001))) %>%
group_by(log.mass.bin) %>%
mutate(log.mass.weights = 1 / n())
mammal_bmr <- mammal_bmr %>%
# make weights sum to nrow(mammal_bmr)
mutate(log.mass.weights = log.mass.weights / sum(mammal_bmr$log.mass.weights) * nrow(mammal_bmr)) %>%
ungroup()
# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
wt_re_model <- glmmTMB(log10.bmr ~ log10.body.mass * habitat +
log10.body.mass*above.10kg + # diet +
(1 | order/genus/species),
weights = log.mass.weights,
data = mammal_bmr)
tab_model(wt_re_model)
| log 10.bmr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.85 | 1.68 – 2.02 | <0.001 |
| log10.body.mass | 0.74 | 0.66 – 0.82 | <0.001 |
| habitat [land] | -0.25 | -0.40 – -0.09 | 0.002 |
| above.10kg [Smaller] | 0.06 | -0.06 – 0.19 | 0.335 |
|
log10.body.mass * habitat [land] |
0.05 | -0.04 – 0.14 | 0.261 |
|
log10.body.mass * above.10kg [Smaller] |
-0.10 | -0.18 – -0.02 | 0.019 |
| Random Effects | |||
| σ2 | 0.00 | ||
| τ00 species:(genus:order) | 0.01 | ||
| τ00 genus:order | 0.01 | ||
| τ00 order | 0.02 | ||
| ICC | 0.99 | ||
| N species | 626 | ||
| N genus | 415 | ||
| N order | 24 | ||
| Observations | 722 | ||
| Marginal R2 / Conditional R2 | 0.949 / 0.999 | ||
wt_re_anova_results <- car::Anova(wt_re_model)
pander(wt_re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 5405 | 1 | 0 |
| habitat | 17.61 | 1 | 2.712e-05 |
| above.10kg | 7.242 | 1 | 0.00712 |
| log10.body.mass:habitat | 1.264 | 1 | 0.2608 |
| log10.body.mass:above.10kg | 5.482 | 1 | 0.01922 |
Only include species with body mass of 10 kg or more.
big_mammal_bmr <- mammal_bmr %>%
filter(log10.body.mass >= log10(10)) %>%
mutate(across(where(is.factor), factor))
# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
big_re_model <- glmmTMB(log10.bmr ~ log10.body.mass * habitat + # diet +
(1 | order/genus/species),
data = big_mammal_bmr)
tab_model(big_re_model)
| log 10.bmr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.92 | 1.67 – 2.16 | <0.001 |
| log10.body.mass | 0.68 | 0.58 – 0.78 | <0.001 |
| habitat [land] | -0.41 | -0.65 – -0.17 | 0.001 |
|
log10.body.mass * habitat [land] |
0.13 | 0.01 – 0.26 | 0.037 |
| Random Effects | |||
| σ2 | 0.01 | ||
| τ00 species:(genus:order) | 0.00 | ||
| τ00 genus:order | 0.01 | ||
| τ00 order | 0.04 | ||
| ICC | 0.82 | ||
| N species | 58 | ||
| N genus | 52 | ||
| N order | 12 | ||
| Observations | 60 | ||
| Marginal R2 / Conditional R2 | 0.820 / 0.968 | ||
big_re_anova_results <- car::Anova(big_re_model)
pander(big_re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 417.5 | 1 | 8.542e-93 |
| habitat | 14.49 | 1 | 0.0001409 |
| log10.body.mass:habitat | 4.372 | 1 | 0.03654 |
#Read in trees from Upham et al
tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)
all_trees <- list()
for (i in 1:length(tree_files)){
all_trees[[i]] <- read.tree(paste0(tree_path, '/',
tree_files[i]))
if (i == 1){
treeset <- all_trees[[i]]
}else{
treeset <- c(treeset, all_trees[[i]])
}
}
all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
function(x)
stringr::str_replace_all(x, pattern = '_',
replacement = ' '))
# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
if (t == 1){
tip_labs <- all_tip_labels[[t]]
}else{
tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
}
}
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_bmr %>%
mutate(animal = as.character(animal)) %>%
filter(animal %in% tip_labs) %>%
droplevels()
taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')
Fit models, one model for every tree in our list.
pgls_models <- list()
for (t in c(1:length(treeset))){
# make sure there is only one row of data per species (why sample and not average -- seems dubious??)
pgls_rep_data <- pgls_data %>%
group_by(animal) %>%
sample_n(1) %>%
ungroup
#Reduce the tree to only include those species in the data set
refit_tree <- treeset[[t]]
refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
refit_tree <- drop.tip(refit_tree,
setdiff(refit_tree$tip.label,
unique(pgls_rep_data %>% pull(animal))))
#Order the data set so that it is in the same order as the tip labels of the tree
pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
pgls_rep_data,
by = c('tree.tip.label' = 'animal'),
keep = TRUE)
# fit the model
pgls_models[[t]] <- tryCatch({
fittd <- gls(log10.bmr ~ log10.body.mass * habitat +
log10.body.mass * above.10kg, # diet ,
correlation = corPagel(value = 0.8,
phy = refit_tree,
fixed = FALSE,
form = ~animal),
data = pgls_rep_data)
},
error = function(cond){
message(paste('PGLS fit failed for tree', t))
return(NULL)
}
)
}
pgls_models <- pgls_models[which(lapply(pgls_models, is.null) == FALSE)]
Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 0.
Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.
# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
pander(pooled_pgls_summ %>% select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
| term | estimate | std.error | 2.5 % |
|---|---|---|---|
| (Intercept) | 1.689 | 0.1618 | 1.371 |
| log10.body.mass | 0.7731 | 0.03877 | 0.697 |
| habitatland | -0.1617 | 0.07007 | -0.2992 |
| above.10kgSmaller | 0.05399 | 0.05733 | -0.05856 |
| log10.body.mass:habitatland | 0.009613 | 0.04303 | -0.07488 |
| log10.body.mass:above.10kgSmaller | -0.07755 | 0.03827 | -0.1527 |
| 97.5 % | lambda | fmi |
|---|---|---|
| 2.007 | 0.002006 | 0.004831 |
| 0.8492 | 0.00122 | 0.004045 |
| -0.02411 | 0.002081 | 0.004906 |
| 0.1665 | 0.002716 | 0.005541 |
| 0.0941 | 0.001107 | 0.003932 |
| -0.002419 | 0.0011 | 0.003925 |
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
| F | num df | den df | missing info | |
|---|---|---|---|---|
| log10.body.mass | 5088 | 1 | 8093187 | 0.003375 |
| habitat | 10.73 | 1 | 10939339 | 0.002903 |
| above.10kg | 3.923 | 1 | 2403824 | 0.006194 |
| log10.body.mass:habitat | 0.0499 | 1 | 75194363 | 0.001107 |
| log10.body.mass:above.10kg | 4.107 | 1 | 76165063 | 0.0011 |
| riv | Pr(>F) | |
|---|---|---|
| log10.body.mass | 0.003387 | 0 |
| habitat | 0.002912 | 0.001056 |
| above.10kg | 0.006233 | 0.04763 |
| log10.body.mass:habitat | 0.001109 | 0.8232 |
| log10.body.mass:above.10kg | 0.001101 | 0.04271 |
Alternative approach: using MuMIn to do model averaging. This will weight models according to their information criteria scores instead of weighting them all equally. But we see here the results are nearly identical.
pgls_avg <- model.avg(pgls_models,
rank = function(x) 1)
This gives a model with the same coefficients (same model) that I think we can better make predictions from. The other way (with pool()) is better for getting the ANOVA results we wanted. Two ways of getting the same result, in different packages.
For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.
Previous approach was to take the mean of the estimates of \(\Lambda\) from all the individual fitted models.
lambda <- mean(unlist(purrr::map(pgls_models,
function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.8899228
According to this simple method our estimate is \(\hat{\Lambda} =\) 0.89.
It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?
There’s a problem here in that the “pooled” model object does NOT have any information stored anymore about the variance-covariance matrix, which is key for making predictions from the model. So for making predictions we need to use package MuMIn and its model.avg() function, just weighting equally all the models from all the trees. This results in a fitted single model object from which prediction is possible with predict(). From initial inspection the coefficient estimates of the averaged model are the same as the previous version averaged with the mice package.
pgls_preds <- predict(pgls_avg,
se.fit = TRUE,
newdata = mammal_bmr)
mammal_bmr <- mammal_bmr %>%
mutate(pgls_fitted = pgls_preds$fit,
pgls_lo = pgls_preds$fit - 1.96*pgls_preds$se.fit,
pgls_hi = pgls_preds$fit + 1.96*pgls_preds$se.fit
)
gf_line(pgls_fitted ~ log10.body.mass,
color = ~habitat,
data = mammal_bmr) %>%
gf_ribbon(pgls_lo + pgls_hi ~ log10.body.mass,
color = ~habitat, fill = ~habitat) %>%
# gf_facet_grid(~ diet) %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors))
gf_point(log10.bmr ~ pgls_fitted,
data = mammal_bmr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(BMR)',
x = 'Model-predicted log10(BMR)',
title = 'PGLS Model') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Notice that the CIs are wider here than in the original linear regression model. This is because of appropriate incorporation of the fact that the observations are not independent and there’s phylogenetic signal in the data.
As far as I know though, there is not a way to make predictions for specific species or taxa that are more accurate and account for the “way” in which the species/taxon tends to deviate from the overall average. Instead, our overall uncertainty is adjusted to account for the dependence that we know is in the data. But we can’t (I don’t think) decompose it into a random part and a phylogeny part like we can with the random effects; they are both mixed inextricably together.
We could consider grouping by by body-mass ranges or phylogeny and then assess predictive accuracy as the measure of “success”
This remains to do.
Want to repeat a similar analysis with other datasets on other response variables of interest in a “physiological hierarchy”:
All these are other candidate response variables. We started with metabolism, and want to move on to these other metrics.
Main predictor of interest is habitat, controlling for body size and phylogeny: Are aquatic/marine animals different in some notable way?
Data sets for these other variables are smaller (about n = 50 species).
Predictors: body mass interacting with habitat.
mammal_fr <- read.csv("data/fr.csv")
log10fr is the base-10 logarithm of breathing frequency (units not known?)Rename some variables and create species name from genus and species.
mammal_fr <- mammal_fr %>%
rename(source = Source,
order = order.corrected,
# n.animals = Number.of.Animals,
mass.kg = body.mass..kg.) %>%
mutate(order = str_to_title(order),
genus = str_to_title(genus),
animal = paste(genus, species),
above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))
Keep only variables we will be using. And factor() “chr” variables.
mammal_fr <- mammal_fr %>%
dplyr::select(order,
genus,
species,
common.name,
mass.kg,
log10.body.mass,
log10fr,
habitat,
source,
animal,
above.10kg
) %>%
mutate(across(where(is.character), factor)) %>%
arrange(order, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10fr ~ log10.body.mass | habitat,
color = ~order,
# size = ~parse_number(n_animals),
data = mammal_fr,
alpha = 0.5) %>%
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) %>%
gf_theme(scale_color_viridis_d('Order')) %>%
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(fR (breaths/min))')
my_scatter_plot
plotly::ggplotly(my_scatter_plot) %>%
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.
Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?
lm_model <- lm(log10fr ~ log10.body.mass * habitat,
data = mammal_fr)
summary(lm_model)
##
## Call:
## lm(formula = log10fr ~ log10.body.mass * habitat, data = mammal_fr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0346 -0.1332 0.0048 0.1628 0.7500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.52694 0.12823 11.908 < 2e-16 ***
## log10.body.mass -0.40700 0.04817 -8.450 2.94e-13 ***
## habitatland 0.21030 0.13763 1.528 0.129768
## log10.body.mass:habitatland 0.19517 0.05445 3.584 0.000531 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2659 on 97 degrees of freedom
## Multiple R-squared: 0.7982, Adjusted R-squared: 0.792
## F-statistic: 127.9 on 3 and 97 DF, p-value: < 2.2e-16
tab_model(lm_model)
| log 10 fr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.53 | 1.27 – 1.78 | <0.001 |
| log10.body.mass | -0.41 | -0.50 – -0.31 | <0.001 |
| habitat [land] | 0.21 | -0.06 – 0.48 | 0.130 |
|
log10.body.mass * habitat [land] |
0.20 | 0.09 – 0.30 | 0.001 |
| Observations | 101 | ||
| R2 / R2 adjusted | 0.798 / 0.792 | ||
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| log10.body.mass | 9.063 | 1 | 128.1 | 1.956e-19 |
| habitat | 8.024 | 1 | 113.4 | 5.293e-18 |
| log10.body.mass:habitat | 0.9087 | 1 | 12.85 | 0.0005309 |
| Residuals | 6.861 | 97 | NA | NA |
lm_preds <- predict(lm_model, se.fit = TRUE)
mammal_fr <- mammal_fr %>% mutate(lm_resids = resid(lm_model),
lm_fitted = lm_preds$fit,
lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_fr)
s245::gf_acf(~lm_model)
gf_dhistogram(~lm_resids, data = mammal_fr,
bins = 20) %>%
gf_fitdistr()
Any predictors not shown in a plot are held constant at their mean or most common value.
gf_line(lm_fitted ~ log10.body.mass,
color = ~habitat,
data = mammal_fr) %>%
gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
color = ~habitat, fill = ~habitat) %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors))
gf_point(log10fr ~ lm_fitted, data = mammal_fr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(fR)',
x = 'Model-predicted log10(fR)',
title = 'Linear Regresssion (no phylogeny)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
re_model <- glmmTMB(log10fr ~ log10.body.mass * habitat + (1 | order/genus/species),
data = mammal_fr)
summary(re_model)
## Family: gaussian ( identity )
## Formula:
## log10fr ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_fr
##
## AIC BIC logLik deviance df.resid
## 10.3 31.2 2.8 -5.7 93
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## species:(genus:order) (Intercept) 0.01268 0.1126
## genus:order (Intercept) 0.01781 0.1334
## order (Intercept) 0.05066 0.2251
## Residual 0.01752 0.1324
## Number of obs: 101, groups:
## species:(genus:order), 101; genus:order, 85; order, 10
##
## Dispersion estimate for gaussian family (sigma^2): 0.0175
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.53600 0.14947 10.276 < 2e-16 ***
## log10.body.mass -0.42091 0.04738 -8.884 < 2e-16 ***
## habitatland 0.05191 0.13167 0.394 0.693
## log10.body.mass:habitatland 0.23521 0.05120 4.594 4.36e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
| log 10 fr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.54 | 1.24 – 1.83 | <0.001 |
| log10.body.mass | -0.42 | -0.51 – -0.33 | <0.001 |
| habitat [land] | 0.05 | -0.21 – 0.31 | 0.693 |
|
log10.body.mass * habitat [land] |
0.24 | 0.13 – 0.34 | <0.001 |
| Random Effects | |||
| σ2 | 0.02 | ||
| τ00 species:(genus:order) | 0.01 | ||
| τ00 genus:order | 0.02 | ||
| τ00 order | 0.05 | ||
| ICC | 0.80 | ||
| N species | 98 | ||
| N genus | 85 | ||
| N order | 10 | ||
| Observations | 101 | ||
| Marginal R2 / Conditional R2 | 0.730 / 0.945 | ||
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 70.12 | 1 | 5.581e-17 |
| habitat | 88.75 | 1 | 4.474e-21 |
| log10.body.mass:habitat | 21.1 | 1 | 4.355e-06 |
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_fr <- mammal_fr %>%
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_fr, 'fitted-models/fr-data.RDS')
saveRDS(re_model, 'fitted-models/fr-re-model.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_fr)
acf(resid(re_model))
#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_fr) %>%
gf_fitdistr()
mammal_fr <- mammal_fr %>%
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10fr ~ 10^log10.body.mass,
color = ~Habitat,
text = ~animal,
data = mammal_fr) %>%
gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
color = ~Habitat,
data = mammal_fr,
text = ~Habitat) %>%
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) %>%
gf_labs(x = 'Body Mass (kg)', y = 'Breathing frequency\n(breaths/minute)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors)) %>%
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)))
# For breathing frequency, terrestrial (Stahl 1967): fR (breaths/minute) = 53.5 * body mass ^-0.26
# For breathing frequency, marine (Mortola, 2006): fR (breaths/minute) = 33 * body mass ^-0.42
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # mass = seq(from = 0.015, by = 0.1, to = 42000)) %>%
mutate(fr_stahl = 53.5*mass^-0.26,
fr_mortola = 33*mass^-0.42)
re_preds <- re_preds %>%
gf_line(fr_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl') %>%
gf_line(fr_mortola ~ mass, data = pub_models, color = 'black', linetype = 'dashed',
text = ~'Mortola') %>%
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.9),
legend.title = element_blank()) %>%
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')
Figure 2.1: Observed and predicted breathing frequency as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967) for terrestrial species, and dashed black line on Mortola (2006) for marine species.
gf_point(log10fr ~ re_ind_fitted, data = mammal_fr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(fr)',
x = 'Model-predicted, Species-specific log10(fr)',
title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?
no_species <- mammal_fr %>%
mutate(species = NA)
re_genus_level_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL,
newdata = no_species)
gf_point(log10fr ~ re_genus_level_preds$fit, data = mammal_fr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(fH)',
x = 'Model-predicted, Genus-specific log10(fr)',
title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
#Read in the trees from Upham et al
#Upham et al provide four sets of 10000 trees
# ??? There are 101 files of trees in the directory?
# original code sampled 4x10 trees from a set of 100
# here I am just using all 101 that are there, is that oK?
tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)
all_trees <- list()
for (i in 1:length(tree_files)){
all_trees[[i]] <- read.tree(paste0(tree_path, '/',
tree_files[i]))
if (i == 1){
treeset <- all_trees[[i]]
}else{
treeset <- c(treeset, all_trees[[i]])
}
}
all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
function(x)
stringr::str_replace_all(x, pattern = '_',
replacement = ' '))
# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
if (t == 1){
tip_labs <- all_tip_labels[[t]]
}else{
tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
}
}
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_fr %>%
filter(animal %in% tip_labs) %>%
droplevels()
taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')
Fit models, one model for every tree in our list.
pgls_models <- list()
for (t in c(1:length(treeset))){
# make sure there is only one row of data per species (seems dubious??)
pgls_rep_data <- pgls_data %>%
group_by(animal) %>%
sample_n(1) %>%
ungroup
#Reduce the tree to only include those species in the data set
refit_tree <- treeset[[t]]
refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
refit_tree <- drop.tip(refit_tree,
setdiff(refit_tree$tip.label,
levels(pgls_rep_data %>% pull(animal))))
#Order the data set so that it is in the same order as the tip labels of the tree
pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
pgls_rep_data,
by = c('tree.tip.label' = 'animal'),
keep = TRUE)
# fit the model
pgls_models[[t]] <- tryCatch({
fittd <- gls(log10fr ~ log10.body.mass * habitat,
correlation = corPagel(value = 0.8,
phy = refit_tree,
fixed = FALSE,
form = ~animal),
data = pgls_rep_data)
},
error = function(cond){
message(paste('PGLS fit failed for tree', t))
return(NULL)
}
)
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]
Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 0.
Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.
# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)
# # pool summarise the models using Rubin's rule corrected for small samples
#summary(pgls_mira)
pooled_pgls <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
#pooled_pgls_summ <- summary(pool(pgls_mira, type = 'all', conf.int = TRUE))
pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
| term | estimate | std.error | 2.5 % | 97.5 % |
|---|---|---|---|---|
| (Intercept) | 1.609 | 0.1827 | 1.247 | 1.972 |
| log10.body.mass | -0.415 | 0.04703 | -0.5084 | -0.3216 |
| habitatland | 0.1285 | 0.1379 | -0.1453 | 0.4024 |
| log10.body.mass:habitatland | 0.1935 | 0.05439 | 0.08551 | 0.3015 |
| lambda | fmi |
|---|---|
| 0.001209 | 0.0216 |
| 0.0006789 | 0.02107 |
| 0.001403 | 0.0218 |
| 0.0139 | 0.03429 |
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
| F | num df | den df | missing info | riv | |
|---|---|---|---|---|---|
| log10.body.mass | 145.8 | 1 | 42780 | 0.04647 | 0.04873 |
| habitat | 85.92 | 1 | 13315 | 0.08337 | 0.09095 |
| log10.body.mass:habitat | 12.66 | 1 | 477466 | 0.0139 | 0.0141 |
| Pr(>F) | |
|---|---|
| log10.body.mass | 1.644e-33 |
| habitat | 2.156e-20 |
| log10.body.mass:habitat | 0.0003741 |
Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally.
pgls_avg <- model.avg(pgls_models,
rank = function(x) 1)
lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.7097979
Predictors: body mass interacting with habitat.
mammal_hr <- read.csv("data/hr.csv")
log10hr is the base-10 logarithm of the heart rate in beats/minRename some variables and create species name from genus and species.
mammal_hr <- mammal_hr %>%
rename(source = Source,
order = order.corrected,
# n.animals = Number.of.Animals,
mass.kg = body.mass..kg.) %>%
mutate(order = str_to_title(order),
genus = str_to_title(genus),
animal = paste(genus, species),
above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))
Keep only variables we will be using. And “factor” “chr” variables.
mammal_hr <- mammal_hr %>%
dplyr::select(order,
genus,
species,
common.name,
mass.kg,
log10.body.mass,
log10hr,
habitat,
source,
animal,
above.10kg
) %>%
mutate(across(where(is.character), factor)) %>%
arrange(order, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10hr ~ log10.body.mass | habitat,
color = ~order,
# size = ~parse_number(n_animals),
data = mammal_hr,
alpha = 0.5) %>%
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) %>%
gf_theme(scale_color_viridis_d('Order')) %>%
gf_labs(x = 'Log10(fH (beats/min))',
y = 'Log10(BMR (kcal/day))')
my_scatter_plot
plotly::ggplotly(my_scatter_plot) %>%
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.
Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?
lm_model <- lm(log10hr ~ log10.body.mass * habitat,
data = mammal_hr)
summary(lm_model)
##
## Call:
## lm(formula = log10hr ~ log10.body.mass * habitat, data = mammal_hr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35008 -0.09705 -0.00279 0.07544 0.46383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.30719 0.04970 46.419 <2e-16 ***
## log10.body.mass -0.19865 0.01931 -10.288 <2e-16 ***
## habitatland 0.01562 0.05638 0.277 0.782
## log10.body.mass:habitatland -0.01550 0.02415 -0.642 0.523
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1371 on 85 degrees of freedom
## Multiple R-squared: 0.8096, Adjusted R-squared: 0.8028
## F-statistic: 120.4 on 3 and 85 DF, p-value: < 2.2e-16
tab_model(lm_model)
| log 10 hr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 2.31 | 2.21 – 2.41 | <0.001 |
| log10.body.mass | -0.20 | -0.24 – -0.16 | <0.001 |
| habitat [land] | 0.02 | -0.10 – 0.13 | 0.782 |
|
log10.body.mass * habitat [land] |
-0.02 | -0.06 – 0.03 | 0.523 |
| Observations | 89 | ||
| R2 / R2 adjusted | 0.810 / 0.803 | ||
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| log10.body.mass | 6.081 | 1 | 323.3 | 1.043e-30 |
| habitat | 0.003741 | 1 | 0.1989 | 0.6567 |
| log10.body.mass:habitat | 0.007742 | 1 | 0.4116 | 0.5229 |
| Residuals | 1.599 | 85 | NA | NA |
lm_preds <- predict(lm_model, se.fit = TRUE)
mammal_hr <- mammal_hr %>% mutate(lm_resids = resid(lm_model),
lm_fitted = lm_preds$fit,
lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_hr)
s245::gf_acf(~lm_model)
gf_dhistogram(~lm_resids, data = mammal_hr,
bins = 17) %>%
gf_fitdistr()
Any predictors not shown in a plot are held constant at their mean or most common value.
gf_line(lm_fitted ~ log10.body.mass,
color = ~habitat,
data = mammal_hr) %>%
gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
color = ~habitat, fill = ~habitat) %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors))
gf_point(log10hr ~ lm_fitted, data = mammal_hr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(fH)',
x = 'Model-predicted log10(fH)',
title = 'Linear Regresssion (no phylogeny)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Can refine the plot of predictions later on.
Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
re_model <- glmmTMB(log10hr ~ log10.body.mass * habitat + (1 | order/genus),
data = mammal_hr)
summary(re_model)
## Family: gaussian ( identity )
## Formula: log10hr ~ log10.body.mass * habitat + (1 | order/genus)
## Data: mammal_hr
##
## AIC BIC logLik deviance df.resid
## -92.5 -75.1 53.2 -106.5 82
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## genus:order (Intercept) 5.435e-03 7.372e-02
## order (Intercept) 2.571e-12 1.604e-06
## Residual 1.271e-02 1.128e-01
## Number of obs: 89, groups: genus:order, 69; order, 10
##
## Dispersion estimate for gaussian family (sigma^2): 0.0127
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.30472 0.04946 46.59 <2e-16 ***
## log10.body.mass -0.19728 0.01945 -10.14 <2e-16 ***
## habitatland 0.01622 0.05535 0.29 0.769
## log10.body.mass:habitatland -0.01504 0.02417 -0.62 0.534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
| log 10 hr | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 2.30 | 2.21 – 2.40 | <0.001 |
| log10.body.mass | -0.20 | -0.24 – -0.16 | <0.001 |
| habitat [land] | 0.02 | -0.09 – 0.12 | 0.769 |
|
log10.body.mass * habitat [land] |
-0.02 | -0.06 – 0.03 | 0.534 |
| Random Effects | |||
| σ2 | 0.01 | ||
| τ00 genus:order | 0.01 | ||
| τ00 order | 0.00 | ||
| N genus | 69 | ||
| N order | 10 | ||
| Observations | 89 | ||
| Marginal R2 / Conditional R2 | 0.857 / NA | ||
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 298 | 1 | 8.87e-67 |
| habitat | 0.1295 | 1 | 0.7189 |
| log10.body.mass:habitat | 0.3871 | 1 | 0.5338 |
Note: for this model the random effect of species is excluded as we don’t have enough species with more than one observation to fit it well.
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_hr <- mammal_hr %>%
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_hr, 'fitted-models/hr-data.RDS')
saveRDS(re_model, 'fitted-models/hr-re-model.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_hr)
acf(resid(re_model))
#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_hr, bins = 15) %>%
gf_fitdistr()
mammal_hr <- mammal_hr %>%
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10hr ~ 10^log10.body.mass,
color = ~Habitat,
text = ~animal,
data = mammal_hr) %>%
gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
color = ~Habitat,
data = mammal_hr,
text = ~Habitat) %>%
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) %>%
gf_labs(x = 'Body Mass (kg)', y = 'Heart Rate (beats/minute)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors)) %>%
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)
),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))
)
# For heart rate (Stahl 1967): fH (beats/minute)= 241 * body mass (kg) ^-0.25
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # data.frame(mass = seq(from = 0.009, by = 0.1, to = 70000)) %>%
mutate(hr_stahl = 241*mass^-0.25)
re_preds <- re_preds %>%
gf_line(hr_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl') %>%
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.9),
legend.title = element_blank()) %>%
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')
Figure 3.1: Observed and predicted heart rate as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967).
gf_point(log10hr ~ re_ind_fitted, data = mammal_hr,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(fH)',
x = 'Model-predicted, Species-specific log10(fH)',
title = 'Mixed-effects Model (RE of Order/Genus)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
#Read in the trees from Upham et al
tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)
all_trees <- list()
for (i in 1:length(tree_files)){
all_trees[[i]] <- read.tree(paste0(tree_path, '/',
tree_files[i]))
if (i == 1){
treeset <- all_trees[[i]]
}else{
treeset <- c(treeset, all_trees[[i]])
}
}
all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
function(x)
stringr::str_replace_all(x, pattern = '_',
replacement = ' '))
# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
if (t == 1){
tip_labs <- all_tip_labels[[t]]
}else{
tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
}
}
# keep only the species in mammal_hr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_hr %>%
filter(animal %in% tip_labs) %>%
droplevels()
taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')
Fit models, one model for every tree in our list.
pgls_models <- list()
for (t in c(1:length(treeset))){
# make sure there is only one row of data per species (seems dubious??)
pgls_rep_data <- pgls_data %>%
group_by(animal) %>%
sample_n(1) %>%
ungroup
#Reduce the tree to only include those species in the data set
refit_tree <- treeset[[t]]
refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
refit_tree <- drop.tip(refit_tree,
setdiff(refit_tree$tip.label,
levels(pgls_rep_data %>% pull(animal))))
#Order the data set so that it is in the same order as the tip labels of the tree
pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
pgls_rep_data,
by = c('tree.tip.label' = 'animal'),
keep = TRUE)
# fit the model
pgls_models[[t]] <- tryCatch({
fittd <- gls(log10hr ~ log10.body.mass * habitat,
correlation = corPagel(value = 0.8,
phy = refit_tree,
fixed = FALSE,
form = ~animal),
data = pgls_rep_data)
},
error = function(cond){
message(paste('PGLS fit failed for tree', t))
return(NULL)
}
)
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]
Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 22.
Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.
# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
| term | estimate | std.error | 2.5 % | 97.5 % |
|---|---|---|---|---|
| (Intercept) | 2.286 | 0.06094 | 2.164 | 2.408 |
| log10.body.mass | -0.1995 | 0.01916 | -0.2376 | -0.1613 |
| habitatland | 0.01181 | 0.05737 | -0.1024 | 0.126 |
| log10.body.mass:habitatland | -0.01428 | 0.02437 | -0.06279 | 0.03423 |
| lambda | fmi |
|---|---|
| 0.1949 | 0.2194 |
| 0.01253 | 0.0366 |
| 0.02457 | 0.04864 |
| 0.01704 | 0.04111 |
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
| F | num df | den df | missing info | |
|---|---|---|---|---|
| log10.body.mass | 276.9 | 1 | 66998 | 0.03241 |
| habitat | 0.2097 | 1 | 41740 | 0.04107 |
| log10.body.mass:habitat | 0.3432 | 1 | 242183 | 0.01704 |
| riv | Pr(>F) | |
|---|---|---|
| log10.body.mass | 0.0335 | 4.708e-62 |
| habitat | 0.04283 | 0.647 |
| log10.body.mass:habitat | 0.01734 | 0.558 |
Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally?
pgls_avg <- model.avg(pgls_models,
rank = function(x) 1)
This object is one that we can better make predictions from. The other way (with pool()) is better for getting the ANOVA results.
For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.
Previous approach was to take the mean of the estimates of \(\Lambda\) from all the individual fitted models.
lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.01304882
According to this simple method our estimate is \(\hat{\Lambda} =\) 0.013.
It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?
Predictors: body mass interacting with habitat.
mammal_sv <- read.csv("data/sv.csv")
log10sv is the base-10 logarithm of stroke volume in mlRename some variables and create species name from genus and species.
mammal_sv <- mammal_sv %>%
rename(source = Source,
order = order.corrected,
mass.kg = body.mass..kg.) %>%
mutate(order = str_to_title(order),
genus = str_to_title(genus),
animal = paste(genus, species),
above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))
Keep only variables we will be using. And factor() “chr” variables.
mammal_sv <- mammal_sv %>%
dplyr::select(order,
genus,
species,
common.name,
mass.kg,
log10.body.mass,
log10sv,
habitat,
source,
animal,
above.10kg
) %>%
mutate(across(where(is.character), factor)) %>%
arrange(order, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10sv ~ log10.body.mass | habitat,
color = ~order,
# size = ~parse_number(n_animals),
data = mammal_sv,
alpha = 0.5) %>%
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) %>%
gf_theme(scale_color_viridis_d('Order')) %>%
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(SV (ml))')
my_scatter_plot
plotly::ggplotly(my_scatter_plot) %>%
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.
Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat. Weight by number of individuals (if have data later, don’t have it now)?
lm_model <- lm(log10sv ~ log10.body.mass * habitat,
data = mammal_sv)
summary(lm_model)
##
## Call:
## lm(formula = log10sv ~ log10.body.mass * habitat, data = mammal_sv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38559 -0.09944 -0.03216 0.14260 0.36045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.16485 0.20949 0.787 0.439
## log10.body.mass 0.99959 0.09295 10.754 7.25e-11 ***
## habitatland -0.19610 0.21600 -0.908 0.373
## log10.body.mass:habitatland 0.04781 0.09705 0.493 0.627
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1745 on 25 degrees of freedom
## Multiple R-squared: 0.9853, Adjusted R-squared: 0.9836
## F-statistic: 559.1 on 3 and 25 DF, p-value: < 2.2e-16
tab_model(lm_model)
| log 10 sv | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.16 | -0.27 – 0.60 | 0.439 |
| log10.body.mass | 1.00 | 0.81 – 1.19 | <0.001 |
| habitat [land] | -0.20 | -0.64 – 0.25 | 0.373 |
|
log10.body.mass * habitat [land] |
0.05 | -0.15 – 0.25 | 0.627 |
| Observations | 29 | ||
| R2 / R2 adjusted | 0.985 / 0.984 | ||
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| log10.body.mass | 46.39 | 1 | 1524 | 6.308e-24 |
| habitat | 0.04624 | 1 | 1.519 | 0.2292 |
| log10.body.mass:habitat | 0.007388 | 1 | 0.2427 | 0.6265 |
| Residuals | 0.761 | 25 | NA | NA |
lm_preds <- predict(lm_model, se.fit = TRUE)
mammal_sv <- mammal_sv %>% mutate(lm_resids = resid(lm_model),
lm_fitted = lm_preds$fit,
lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_sv)
acf(resid(lm_model))
gf_dhistogram(~lm_resids, data = mammal_sv,
bins = 7) %>%
gf_fitdistr()
Any predictors not shown in a plot are held constant at their mean or most common value.
gf_line(lm_fitted ~ log10.body.mass,
color = ~habitat,
data = mammal_sv) %>%
gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
color = ~habitat, fill = ~habitat) %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors))
gf_point(log10sv ~ lm_fitted, data = mammal_sv,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(fH)',
x = 'Model-predicted log10(fH)',
title = 'Linear Regresssion (no phylogeny)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Can refine the plot of predictions later on.
Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
re_model <- glmmTMB(log10sv ~ log10.body.mass * habitat +
(1 | order/genus/species),
data = mammal_sv)
summary(re_model)
## Family: gaussian ( identity )
## Formula:
## log10sv ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_sv
##
## AIC BIC logLik deviance df.resid
## -9.5 1.5 12.7 -25.5 21
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## species:(genus:order) (Intercept) 1.740e-03 4.172e-02
## genus:order (Intercept) 2.360e-02 1.536e-01
## order (Intercept) 8.935e-11 9.452e-06
## Residual 1.579e-03 3.973e-02
## Number of obs: 29, groups:
## species:(genus:order), 29; genus:order, 27; order, 9
##
## Dispersion estimate for gaussian family (sigma^2): 0.00158
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.16485 0.19699 0.837 0.403
## log10.body.mass 0.99959 0.08741 11.436 <2e-16 ***
## habitatland -0.17116 0.20442 -0.837 0.402
## log10.body.mass:habitatland 0.03755 0.09173 0.409 0.682
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
| log 10 sv | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 0.16 | -0.22 – 0.55 | 0.403 |
| log10.body.mass | 1.00 | 0.83 – 1.17 | <0.001 |
| habitat [land] | -0.17 | -0.57 – 0.23 | 0.402 |
|
log10.body.mass * habitat [land] |
0.04 | -0.14 – 0.22 | 0.682 |
| Random Effects | |||
| σ2 | 0.00 | ||
| τ00 species:(genus:order) | 0.00 | ||
| τ00 genus:order | 0.02 | ||
| τ00 order | 0.00 | ||
| N species | 28 | ||
| N genus | 27 | ||
| N order | 9 | ||
| Observations | 29 | ||
| Marginal R2 / Conditional R2 | 0.999 / NA | ||
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 1520 | 1 | 0 |
| habitat | 1.576 | 1 | 0.2093 |
| log10.body.mass:habitat | 0.1676 | 1 | 0.6823 |
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_sv <- mammal_sv %>%
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
gf_point(re_resids ~ re_ind_fitted, data = mammal_sv)
# save fitted model and data
saveRDS(mammal_sv, 'fitted-models/sv-data.RDS')
saveRDS(re_model, 'fitted-models/sv-re-model.RDS')
acf(resid(re_model))
acf(resid(re_model))
gf_dhistogram(~re_resids, data = mammal_sv,
bins = 7) %>%
gf_fitdistr()
mammal_sv <- mammal_sv %>%
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10sv ~ 10^log10.body.mass,
color = ~Habitat,
text = ~animal,
data = mammal_sv) %>%
gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
color = ~Habitat,
data = mammal_sv,
text = ~Habitat) %>%
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) %>%
gf_labs(x = 'Body Mass (kg)', y = 'Stroke Volume (mL/beat)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors)) %>%
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))
)
# For stroke volume (Stahl 1967, using prediction equations from Stahl for cardiac output (ml/min) and heat rate(beats/min)): SV (ml/beat) = [187 * body mass(kg)^0.81]/[241 * body mass (kg) ^-0.25]
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # data.frame(mass = seq(from = 0.029, by = 0.1, to = 5000)) %>%
mutate(sv_stahl = (187*mass^0.81) / (241 * mass^-0.25))
re_preds <- re_preds %>%
gf_line(sv_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl') %>%
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) %>%
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')
Figure 4.1: Observed and predicted stroke volume as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967).
gf_point(log10sv ~ re_ind_fitted, data = mammal_sv,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(sv)',
x = 'Model-predicted, Species-specific log10(sv)',
title = 'Mixed-effects Model (RE of Order/Genus/Species)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
#Read in the trees from Upham et al
tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)
all_trees <- list()
for (i in 1:length(tree_files)){
all_trees[[i]] <- read.tree(paste0(tree_path, '/',
tree_files[i]))
if (i == 1){
treeset <- all_trees[[i]]
}else{
treeset <- c(treeset, all_trees[[i]])
}
}
all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
function(x)
stringr::str_replace_all(x, pattern = '_',
replacement = ' '))
# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
if (t == 1){
tip_labs <- all_tip_labels[[t]]
}else{
tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
}
}
# keep only the species in mammal_sv that are in all the trees
# on 6/17 this removes NO species.
pgls_data <- mammal_sv %>%
mutate(animal = as.character(animal)) %>%
filter(animal %in% tip_labs) %>%
droplevels()
taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')
Fit models, one model for every tree in our list.
pgls_models <- list()
for (t in c(1:length(treeset))){
# make sure there is only one row of data per species
pgls_rep_data <- pgls_data %>%
group_by(animal) %>%
sample_n(1) %>%
ungroup
#Reduce the tree to only include those species in the data set
refit_tree <- treeset[[t]]
refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
refit_tree <- drop.tip(refit_tree,
setdiff(refit_tree$tip.label,
unique(pgls_rep_data %>% pull(animal))))
#Order the data set so that it is in the same order as the tip labels of the tree
pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
pgls_rep_data,
by = c('tree.tip.label' = 'animal'),
keep = TRUE)
# fit the model
pgls_models[[t]] <- tryCatch({
fittd <- gls(log10sv ~ log10.body.mass * habitat,
correlation = corPagel(value = 0.8,
phy = refit_tree,
fixed = FALSE,
form = ~animal),
data = pgls_rep_data)
},
error = function(cond){
message(paste('PGLS fit failed for tree', t))
return(NULL)
}
)
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]
Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 4.
Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.
# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
| term | estimate | std.error | 2.5 % | 97.5 % |
|---|---|---|---|---|
| (Intercept) | 0.2041 | 0.21 | -0.2354 | 0.6436 |
| log10.body.mass | 0.9929 | 0.09319 | 0.7988 | 1.187 |
| habitatland | -0.2378 | 0.2206 | -0.6989 | 0.2234 |
| log10.body.mass:habitatland | 0.05945 | 0.09692 | -0.1422 | 0.2611 |
| lambda | fmi |
|---|---|
| 0.1758 | 0.2507 |
| 0.1154 | 0.1908 |
| 0.1604 | 0.2354 |
| 0.09824 | 0.1737 |
# this depends on inner workings of package car found in gls-utils.R
# it used to work without all this, in summer 2021 stopped
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
| F | num df | den df | missing info | riv | |
|---|---|---|---|---|---|
| log10.body.mass | 1419 | 1 | 6153 | 0.1201 | 0.1365 |
| habitat | 1.953 | 1 | 2290 | 0.1972 | 0.2457 |
| log10.body.mass:habitat | 0.3762 | 1 | 9181 | 0.09824 | 0.1089 |
| Pr(>F) | |
|---|---|
| log10.body.mass | 1.219e-279 |
| habitat | 0.1624 |
| log10.body.mass:habitat | 0.5397 |
Alternative approach: using MuMIn to do model averaging. Was it right to weight all of the models/trees equally?
pgls_avg <- model.avg(pgls_models,
rank = function(x) 1)
This gives a model with the same coefficients (same model) that we can better make predict()ions from.
For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.
Simple approach: take the mean of the estimates of \(\Lambda\) from all the individual fitted models.
lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] -0.110112
It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?
Predictors: body mass interacting with habitat.
mammal_vt <- read.csv("data/vt.csv")
log10vt is the base-10 logarithm of tidal volume in mlRename some variables and create species name from genus and species.
mammal_vt <- mammal_vt %>%
rename(source = Source,
order = order.corrected,
mass.kg = body.mass..kg.) %>%
mutate(order = str_to_title(order),
genus = str_to_title(genus),
animal = paste(genus, species),
above.10kg = ifelse(mass.kg >= 10, 'Larger', 'Smaller'))
Keep only variables we will be using. And factor() “chr” variables.
mammal_vt <- mammal_vt %>%
dplyr::select(order,
genus,
species,
common.name,
mass.kg,
log10.body.mass,
log10vt,
habitat,
source,
animal,
above.10kg
) %>%
mutate(across(where(is.character), factor)) %>%
arrange(order, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10vt ~ log10.body.mass | habitat,
color = ~order,
# size = ~parse_number(n_animals),
data = mammal_vt,
alpha = 0.5) %>%
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) %>%
gf_theme(scale_color_viridis_d('Order')) %>%
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(Vt (liter))')
my_scatter_plot
plotly::ggplotly(my_scatter_plot) %>%
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Will not account for phylogeny at all in the model structure. Predictors: body mass interacting with habitat.
lm_model <- lm(log10vt ~ log10.body.mass * habitat,
data = mammal_vt)
tab_model(lm_model)
| log 10 vt | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.60 | 1.22 – 1.98 | <0.001 |
| log10.body.mass | 0.94 | 0.80 – 1.08 | <0.001 |
| habitat [land] | -0.63 | -1.02 – -0.25 | 0.002 |
|
log10.body.mass * habitat [land] |
0.07 | -0.07 – 0.22 | 0.311 |
| Observations | 50 | ||
| R2 / R2 adjusted | 0.990 / 0.990 | ||
lm_anova_results <- car::Anova(lm_model)
pander(lm_anova_results)
| Sum Sq | Df | F value | Pr(>F) | |
|---|---|---|---|---|
| log10.body.mass | 76.68 | 1 | 2682 | 1.949e-42 |
| habitat | 1.376 | 1 | 48.12 | 1.134e-08 |
| log10.body.mass:habitat | 0.03001 | 1 | 1.05 | 0.3109 |
| Residuals | 1.315 | 46 | NA | NA |
lm_preds <- predict(lm_model, se.fit = TRUE)
mammal_vt <- mammal_vt %>% mutate(lm_resids = resid(lm_model),
lm_fitted = lm_preds$fit,
lm_fit_lo = lm_preds$fit + 1.96*lm_preds$se.fit,
lm_fit_hi = lm_preds$fit - 1.96*lm_preds$se.fit)
gf_point(lm_resids ~ lm_fitted, data = mammal_vt)
s245::gf_acf(~lm_model)
gf_dhistogram(~lm_resids, data = mammal_vt,
bins = 20) %>%
gf_fitdistr()
Any predictors not shown in a plot are held constant at their mean or most common value.
gf_line(lm_fitted ~ log10.body.mass,
color = ~habitat,
data = mammal_vt) %>%
gf_ribbon(lm_fit_lo + lm_fit_hi ~ log10.body.mass,
color = ~habitat, fill = ~habitat) %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors))
gf_point(log10vt ~ lm_fitted, data = mammal_vt,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(Vt)',
x = 'Model-predicted log10(Vt)',
title = 'Linear Regresssion (no phylogeny)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are required to be “further” apart than any other two.
re_model <- glmmTMB(log10vt ~ log10.body.mass * habitat + (1 | order/genus/species),
data = mammal_vt)
summary(re_model)
## Family: gaussian ( identity )
## Formula:
## log10vt ~ log10.body.mass * habitat + (1 | order/genus/species)
## Data: mammal_vt
##
## AIC BIC logLik deviance df.resid
## -27.9 -12.6 22.0 -43.9 42
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## species:(genus:order) (Intercept) 0.0108807 0.10431
## genus:order (Intercept) 0.0069420 0.08332
## order (Intercept) 0.0004543 0.02132
## Residual 0.0073999 0.08602
## Number of obs: 50, groups:
## species:(genus:order), 47; genus:order, 42; order, 9
##
## Dispersion estimate for gaussian family (sigma^2): 0.0074
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.61392 0.19456 8.295 < 2e-16 ***
## log10.body.mass 0.93222 0.07429 12.548 < 2e-16 ***
## habitatland -0.65494 0.19922 -3.288 0.00101 **
## log10.body.mass:habitatland 0.07660 0.07244 1.057 0.29036
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
| log 10 vt | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.61 | 1.23 – 2.00 | <0.001 |
| log10.body.mass | 0.93 | 0.79 – 1.08 | <0.001 |
| habitat [land] | -0.65 | -1.05 – -0.26 | 0.001 |
|
log10.body.mass * habitat [land] |
0.08 | -0.07 – 0.22 | 0.290 |
| Random Effects | |||
| σ2 | 0.01 | ||
| τ00 species:(genus:order) | 0.01 | ||
| τ00 genus:order | 0.01 | ||
| τ00 order | 0.00 | ||
| ICC | 0.71 | ||
| N species | 47 | ||
| N genus | 42 | ||
| N order | 9 | ||
| Observations | 50 | ||
| Marginal R2 / Conditional R2 | 0.991 / 0.997 | ||
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| log10.body.mass | 1594 | 1 | 0 |
| habitat | 40.93 | 1 | 1.58e-10 |
| log10.body.mass:habitat | 1.118 | 1 | 0.2904 |
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_vt <- mammal_vt %>%
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_vt, 'fitted-models/vt-data.RDS')
saveRDS(re_model, 'fitted-models/vt-re-model.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_vt)
acf(resid(re_model))
#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_vt) %>%
gf_fitdistr()
mammal_vt <- mammal_vt %>%
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10vt ~ 10^log10.body.mass,
color = ~Habitat,
text = ~animal,
data = mammal_vt) %>%
gf_line(10^re_ave_fitted ~ 10^log10.body.mass,
color = ~Habitat,
data = mammal_vt,
text = ~Habitat) %>%
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10.body.mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) %>%
gf_labs(x = 'Body Mass (kg)', y = 'Tidal Volume (mL)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_theme(scale_fill_manual(values = my_colors)) %>%
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)))
# For tidal volume, terrestrial (Stahl 1967): SV (ml) = 7.69 * body mass^1.04
# For tidal volume, marine (Fahlman 2020, sea lion/cetacean papers): SV (l) = 0.0372 * body mass^0.97
pub_models <- data.frame(mass = seq(from = 0.001, by = 10, to = 70000)) %>% # data.frame(mass = seq(from = 0.008, by = 0.1, to = 5200)) %>%
mutate(vt_stahl = 7.69 * mass^1.04,
vt_fahlman = 37.2 * mass^0.97)
re_preds <- re_preds %>%
gf_line(vt_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl') %>%
gf_line(vt_fahlman ~ mass, data = pub_models, color = 'black',
linetype = 'dashed', text = ~'Fahlman') %>%
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) %>%
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds %>% gf_theme(legend.position='none') %>% ggplotly(tooltip = 'text')
Figure 5.1: Observed and predicted tidal volume as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967) and black dashed line on Fahlman (2020).
gf_point(log10vt ~ re_ind_fitted, data = mammal_vt,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(Vt)',
x = 'Model-predicted, Species-specific log10(VT)',
title = 'Mixed-effects Model (RE of Order/Genus)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
The graph above is a bit “cheating” as we have a random effect of species, but there are only 1-2 measurements for most of the species (nearly guaranteeing that our estimates will be nearly perfect). What if we also check out the predictions accounting for the modeled effects of Order and Genus, but predicting for the “average” species in each Genus?
no_species <- mammal_vt %>%
mutate(species = NA)
re_genus_level_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL,
newdata = no_species)
gf_point(log10vt ~ re_genus_level_preds$fit, data = mammal_vt,
alpha = 0.1) %>%
gf_labs(y = 'Observed log10(Vt)',
x = 'Model-predicted, Genus-specific log10(Vt)',
title = 'Mixed-effects Model (RE of Order/Genus)') %>%
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
#Read in the trees from Upham et al
tree_path <- paste("data/upham-trees/Completed_5911sp_topoCons_FBDasZhouEtAl")
tree_files <- list.files(tree_path)
all_trees <- list()
for (i in 1:length(tree_files)){
all_trees[[i]] <- read.tree(paste0(tree_path, '/',
tree_files[i]))
if (i == 1){
treeset <- all_trees[[i]]
}else{
treeset <- c(treeset, all_trees[[i]])
}
}
all_tip_labels <- purrr::map(treeset, "tip.label")
all_tip_labels <- purrr::map(all_tip_labels,
function(x)
stringr::str_replace_all(x, pattern = '_',
replacement = ' '))
# get list of species that are in ALL the trees
for (t in 1:length(all_tip_labels)){
if (t == 1){
tip_labs <- all_tip_labels[[t]]
}else{
tip_labs <- intersect(tip_labs, all_tip_labels[[t]])
}
}
# keep only the species in mammal_bmr that are in all the trees
# on 4/14 this removes 1 species.
pgls_data <- mammal_vt %>%
filter(animal %in% tip_labs) %>%
droplevels()
taxonomy <- read_csv('data/upham-trees/taxonomy_mamPhy_5911species.csv')
Fit models, one model for every tree in our list.
pgls_models <- list()
for (t in c(1:length(treeset))){
# make sure there is only one row of data per species (seems dubious??)
pgls_rep_data <- pgls_data %>%
group_by(animal) %>%
sample_n(1) %>%
ungroup
#Reduce the tree to only include those species in the data set
refit_tree <- treeset[[t]]
refit_tree$tip.label <- str_replace_all(refit_tree$tip.label, '_', ' ')
refit_tree <- drop.tip(refit_tree,
setdiff(refit_tree$tip.label,
levels(pgls_rep_data %>% pull(animal))))
#Order the data set so that it is in the same order as the tip labels of the tree
pgls_rep_data <- left_join(data.frame(tree.tip.label = refit_tree$tip.label),
pgls_rep_data,
by = c('tree.tip.label' = 'animal'),
keep = TRUE)
# fit the model
pgls_models[[t]] <- tryCatch({
fittd <- gls(log10vt ~ log10.body.mass * habitat,
correlation = corPagel(value = 0.8,
phy = refit_tree,
fixed = FALSE,
form = ~animal),
data = pgls_rep_data)
},
error = function(cond){
message(paste('PGLS fit failed for tree', t))
return(NULL)
}
)
}
pgls_models <- pgls_models[!sapply(pgls_models, is.null)]
Note: we tried to fit 101 PGLS models (each with a different tree); of these, model fitting failed for 6.
Combine the many PGLS model runs together into one summary combined model according to Rubin’s rule.
# as.mira takes the list of models and create an object to be used by the mice package
pgls_mira <- as.mira(pgls_models)
# # pool summarise the models using Rubin's rule corrected for small samples
pooled_pgls <- pool(pgls_mira)
pooled_pgls_summ <- summary(pooled_pgls, type = 'all', conf.int = TRUE)
pander(pooled_pgls_summ %>% dplyr::select(term, estimate, std.error, `2.5 %`, `97.5 %`, lambda, fmi))
| term | estimate | std.error | 2.5 % | 97.5 % |
|---|---|---|---|---|
| (Intercept) | 1.636 | 0.2009 | 1.229 | 2.043 |
| log10.body.mass | 0.9392 | 0.07008 | 0.7974 | 1.081 |
| habitatland | -0.648 | 0.195 | -1.043 | -0.2529 |
| log10.body.mass:habitatland | 0.08686 | 0.07294 | -0.06075 | 0.2345 |
| lambda | fmi |
|---|---|
| 0.06091 | 0.1071 |
| 0.0309 | 0.07718 |
| 0.06823 | 0.1144 |
| 0.04048 | 0.08674 |
pgls_anova <- Anova(pgls_mira)
pander(pgls_anova)
| F | num df | den df | missing info | riv | |
|---|---|---|---|---|---|
| log10.body.mass | 2650 | 1 | 125965 | 0.02618 | 0.02688 |
| habitat | 40.7 | 1 | 28984 | 0.05461 | 0.05776 |
| log10.body.mass:habitat | 1.418 | 1 | 52715 | 0.04048 | 0.04219 |
| Pr(>F) | |
|---|---|
| log10.body.mass | 0 |
| habitat | 1.802e-10 |
| log10.body.mass:habitat | 0.2338 |
Alternative approach: using MuMIn to do model averaging. This will not necessarily weight all of the models/trees equally?
pgls_avg <- model.avg(pgls_models,
rank = function(x) 1)
This gives a model that we can more easily make predictions from. The other way (with pool()) is better for getting the ANOVA results.
For the PGLS model, we also want to extract the estimate of \(\Lambda\), which tells us about how the phylogeny is affecting the correlation structure.
Previous approach was to take the mean of the estimates of \(\Lambda\) from all the individual fitted models.
lambda <- mean(unlist(purrr::map(pgls_models, function(x) x$modelStruct$corStruct)))
lambda
## [1] 0.08816833
According to this simple method our estimate is \(\hat{\Lambda} =\) 0.088.
It’s not really clear how to even approach this, since we don’t really any longer expect the residuals to be normal or independent. And based on the data we know there’s not a huge issue with linearity. So…ok?
Cardiac output is the product of heart rate (hr) and stroke volume (sv).
We will use a parametric bootstrap to make predictions of cardiac output as a function of log10(body mass) and habitat based on our random-effect models for hr and sv.
n_boot <- 1000
We will make sets of many (1000) predictions for each of the species that occur in both the hr and sv data sets.
# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_hr <- readRDS('fitted-models/hr-data.RDS')
hr_mod <- readRDS('fitted-models/hr-re-model.RDS')
mammal_sv <- readRDS('fitted-models/sv-data.RDS')
sv_mod <- readRDS('fitted-models/sv-re-model.RDS')
cardiac_species <- intersect(pull(mammal_hr, animal), pull(mammal_sv, animal))
The two datasets have 24 species in common.
Make dataset for which to make predictions, containing these species.
cardiac_hr <- mammal_hr %>%
dplyr::filter(animal %in% cardiac_species) %>%
dplyr::select(animal, order, genus, species, log10.body.mass, log10hr, habitat) %>%
mutate(across(where(is.factor), factor))
cardiac_sv <- mammal_sv %>%
dplyr::filter(animal %in% cardiac_species) %>%
dplyr::select(animal, order, genus, species, log10.body.mass, log10sv, habitat)
For each row of the dataset, make a set of 1000 model predictions of cardiac output. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (hr or sv) was originally fitted.
First we define custom functions to make the predictions
predict_hr <- function(.){
predict(.,
newdata = cardiac_hr,
re.form = NULL,
type = 'response')
}
predict_sv <- function(.){
predict(.,
newdata = cardiac_sv,
re.form = NULL,
type = 'response')
}
Do the bootstrap(s)
if (!file.exists('fitted-models/hr_boot.RDS')){
hr_boot <- bootMer(hr_mod,
FUN = predict_hr,
nsim = n_boot,
re.form = NULL,
type = 'parametric'
)
saveRDS(hr_boot, 'fitted-models/hr_boot.RDS')
}else{
hr_boot <- readRDS('fitted-models/hr_boot.RDS')
}
if (!file.exists('fitted-models/sv_boot.RDS')){
sv_boot <- bootMer(sv_mod,
FUN = predict_sv,
nsim = n_boot,
re.form = NULL,
type = 'parametric')
saveRDS(sv_boot, 'fitted-models/sv_boot.RDS')
}else{
sv_boot <- readRDS('fitted-models/sv_boot.RDS')
}
Add boot results to datasets, combine to one dataset, and compute predicted cardiac output (and its median and 95% bootstrap CI):
cardiac_hr <- cardiac_hr %>%
mutate(boot_pred_hr = unlist(apply(t(hr_boot$t), 1, list), recursive = FALSE))
cardiac_sv <- cardiac_sv %>%
mutate(boot_pred_sv = unlist(apply(t(sv_boot$t), 1, list), recursive = FALSE))
cardiac_preds <- inner_join(cardiac_hr, cardiac_sv,
by = c('animal', 'order', 'genus', 'species', 'habitat')) %>%
mutate(boot_pred_cardiac = map2(boot_pred_hr, boot_pred_sv, function(.x, .y) mapply(prod, 10^.x, 10^.y))) %>%
mutate(boot_median_cardiac = map_dbl(boot_pred_cardiac, median, na.rm = TRUE),
boot_lo = map_dbl(boot_pred_cardiac, quantile, probs = 0.025, na.rm = TRUE),
boot_hi = map_dbl(boot_pred_cardiac, quantile, probs = 0.975, na.rm = TRUE),
mean_mass = ((10^log10.body.mass.x + 10^log10.body.mass.y) / 2) )
# units: ml/stroke * beats/min
plot results
cardiac_mass <- gf_point(boot_median_cardiac ~ mean_mass, data = cardiac_preds,
color = ~habitat,
text = ~animal) %>%
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = cardiac_preds,
color = ~habitat) %>%
gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
y = 'Predicted Cardiac Output (mL/min)',
x = 'Body Mass (kg)') %>%
gf_theme(scale_color_manual(values = my_colors))
ggplotly(cardiac_mass, tooltip = 'text')
cardiac_mass
cardiac_mass_log <- gf_point(boot_median_cardiac ~ mean_mass, data = cardiac_preds,
color = ~habitat,
text = ~animal) %>%
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = cardiac_preds,
color = ~habitat) %>%
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) %>%
gf_refine(scale_y_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) %>%
gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
y = 'Predicted Cardiac Output (mL/min))',
x = 'Body Mass (kg)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_lims(x = c(0.001, 101000))
ggplotly(cardiac_mass_log, tooltip = 'text')
cardiac_mass_log
Total ventilation is the product of breathing rate (fr) and tidal volume (vt).
We will use a parametric bootstrap to make predictions of total ventilation as a function of log10(body mass) and habitat based on our random-effect models for fr and vt.
We will make sets of many (1000) predictions for each of the species that occur in both the fr and vt data sets.
# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_fr <- readRDS('fitted-models/fr-data.RDS')
fr_mod <- readRDS('fitted-models/fr-re-model.RDS')
mammal_vt <- readRDS('fitted-models/vt-data.RDS')
vt_mod <- readRDS('fitted-models/vt-re-model.RDS')
vent_species <- intersect(pull(mammal_fr, animal), pull(mammal_vt, animal))
The two datasets have 31 species in common.
Make datasets for which to make predictions, containing these species.
vent_fr <- mammal_fr %>%
dplyr::filter(animal %in% vent_species) %>%
dplyr::select(animal, order, genus, species, log10.body.mass, log10fr, habitat) %>%
mutate(across(where(is.factor), factor))
vent_vt <- mammal_vt %>%
dplyr::filter(animal %in% vent_species) %>%
dplyr::select(animal, order, genus, species, log10.body.mass, log10vt, habitat)
For each row of the dataset, make a set of 1000 model predictions of ventilation. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (fr or vt) was originally fitted.
First we define custom functions to make the predictions
predict_fr <- function(.){
predict(.,
newdata = vent_fr,
re.form = NULL,
type = 'response')
}
predict_vt <- function(.){
predict(.,
newdata = vent_vt,
re.form = NULL,
type = 'response')
}
Do the bootstrap(s)
if (!file.exists('fitted-models/fr_boot.RDS')){
fr_boot <- bootMer(fr_mod,
FUN = predict_fr,
nsim = n_boot,
re.form = NULL,
type = 'parametric'
)
saveRDS(fr_boot, 'fitted-models/fr_boot.RDS')
}else{
fr_boot <- readRDS('fitted-models/fr_boot.RDS')
}
if (!file.exists('fitted-models/vt_boot.RDS')){
vt_boot <- bootMer(vt_mod,
FUN = predict_vt,
nsim = n_boot,
re.form = NULL,
type = 'parametric')
saveRDS(vt_boot, 'fitted-models/vt_boot.RDS')
}else{
vt_boot <- readRDS('fitted-models/vt_boot.RDS')
}
Add boot results to datasets, combine to one dataset, and compute predicted ventilation (and its median and 95% bootstrap CI):
vent_fr <- vent_fr %>%
mutate(boot_pred_fr = unlist(apply(t(fr_boot$t), 1, list), recursive = FALSE))
vent_vt <- vent_vt %>%
mutate(boot_pred_vt = unlist(apply(t(vt_boot$t), 1, list), recursive = FALSE))
vent_preds <- inner_join(vent_fr, vent_vt,
by = c('animal', 'order', 'genus', 'species', 'habitat')) %>%
mutate(boot_pred_vent = map2(boot_pred_fr, boot_pred_vt, function(.x, .y) mapply(prod, 10^.x, 10^.y))) %>%
mutate(boot_median_vent = map_dbl(boot_pred_vent, median, na.rm = TRUE),
boot_lo = map_dbl(boot_pred_vent, quantile, probs = 0.025, na.rm = TRUE),
boot_hi = map_dbl(boot_pred_vent, quantile, probs = 0.975, na.rm = TRUE),
mean_mass = ((10^log10.body.mass.x + 10^log10.body.mass.y) / 2) )
# units: breaths/minute (?) * mL / breath ---> mL/min
plot results
vent_mass <- gf_point(boot_median_vent ~ mean_mass, data = vent_preds,
color = ~habitat,
text = ~animal) %>%
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = vent_preds,
color = ~habitat) %>%
gf_labs(title = 'Expected Ventilation\nfor the species modeled',
y = 'Predicted Ventilation (mL/min)',
x = 'Body Mass (kg)') %>%
gf_theme(scale_color_manual(values = my_colors))
ggplotly(vent_mass, tooltip = 'text')
vent_mass
vent_mass_log <- gf_point(boot_median_vent ~ mean_mass, data = vent_preds,
color = ~habitat,
text = ~animal) %>%
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = vent_preds,
color = ~habitat) %>%
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) %>%
gf_refine(scale_y_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) %>%
gf_labs(title = 'Expected Ventilation\nfor the species modeled',
y = 'Predicted Ventilation (mL/min)',
x = 'Body Mass (kg)') %>%
gf_theme(scale_color_manual(values = my_colors)) %>%
gf_lims(x = c(0.001, 101000))
ggplotly(vent_mass_log, tooltip = 'text')
vent_mass_log